import numpy as np
import pandas as pd
import geopandas as gpd
import fiona
import glob
import os
import contextily as ctx
from scipy.spatial import cKDTree
from shapely.geometry import Point
import json
from tqdm.auto import tqdm
pd.set_option('min_rows', 30)
import sys
sys.path.append('..')
from importlib import reload
# import src.utils as utils
# reload(utils)
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (16, 16)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 10)
%%time
parcels = gpd.read_file('input/lds-nz-primary-parcels-CLIPPED-4326.gpkg')
parcels_sample = parcels.sample(10000)
# keep a centroid and polygon version - change between them according to need
parcels_sample['geometry_centroid_4326'] = parcels_sample.geometry.centroid.to_crs(4326)
parcels_sample['geometry_polygon_4326'] = parcels_sample.geometry.to_crs(4326)
parcels_sample['geometry_centroid_2193'] = parcels_sample.geometry.centroid.to_crs(2193)
parcels_sample['geometry_polygon_2193'] = parcels_sample.geometry.to_crs(2193)
<ipython-input-81-9e0513578c82>:2: UserWarning: Geometry is in a geographic CRS. Results from 'centroid' are likely incorrect. Use 'GeoSeries.to_crs()' to re-project geometries to a projected CRS before this operation. parcels_sample['geometry_centroid'] = parcels_sample.geometry.centroid
%%time
aup_zones = gpd.read_file('restricted/2016_Unitary_Plan_operational_15Nov/UP_OIP_15Nov2016_SHP/MASTER_UP_BaseZone.shp')
aup_zones = aup_zones.to_crs(2193)
aup_zones.sample(3)
CPU times: user 31.2 s, sys: 2.21 s, total: 33.4 s Wall time: 33.3 s
| WORKFLOWJO | last_edi00 | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | ID | NAME | OBJECTID | PARCEL_BAS | PARCEL_B00 | PRECINCT | PRECINCT_r | PRECINCTGR | PRECINCT00 | SCHEDULE | SHAPE_Area | SHAPE_Leng | STATUS | SUBPRECINC | SUBPRECI00 | SUBTYPE | SUBTYPE_re | TYPE | TYPE_resol | VALIDATION | VALIDATI00 | VERSIONSTA | VERSIONS00 | ZONE | ZONE_resol | ZONEHEIGHT | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 58724 | NaN | 20161111011 | None | 20160718211 | None | {0CCC66B0-9A15-477A-BE4F-FAE9C5FA9C15} | 5 | Coastal | None | None | 58725 | None | None | None | None | None | None | None | 62.153950 | 58.346375 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 59 | Coastal - Coastal Transition Zone | NaN | POLYGON Z ((1756089.130 5973206.276 0.000, 175... |
| 103279 | NaN | 20161111012 | None | 20160718211 | None | {111B7716-6401-457F-A260-9BA18D7E0DB9} | 5 | Coastal | None | None | 103280 | None | None | None | None | None | None | None | 372.662968 | 106.669373 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 30 | Coastal - General Coastal Marine Zone | NaN | POLYGON Z ((1810661.036 5993908.378 0.000, 181... |
| 54081 | NaN | 20161111011 | None | 20160718211 | None | {CD16D435-B3A5-4C26-BFC7-E6AF32E60231} | 7 | General | None | None | 54082 | None | None | None | None | None | None | None | 41502.960080 | 1179.183755 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 26 | Strategic Transport Corridor Zone | NaN | POLYGON Z ((1737071.120 5973948.034 0.000, 173... |
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample = gpd.sjoin(parcels_sample, aup_zones[['ZONE', 'ZONE_resol', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'ZONE': 'LINZmatch_AUP_code', 'ZONE_resol': 'LINZmatch_AUP_name'})
CPU times: user 59.9 s, sys: 0 ns, total: 59.9 s Wall time: 1min
ax = parcels_sample[parcels_sample.LINZmatch_AUP_name.str.contains('Business')].sample(500).plot(alpha=0.5)
ctx.add_basemap(ax, crs=2193)
I've included all of these as rural:
['Rural - Mixed Rural Zone',
'Rural - Rural Coastal Zone',
'Rural - Countryside Living Zone',
'Rural - Rural Production Zone',
'Rural - Rural Conservation Zone',
'Rural - Waitakere Ranges Zone',
'Rural - Waitakere Foothills Zone']
rural_codes = aup_zones[aup_zones.ZONE_resol.str.lower().str.contains('rural - ', na=False)]['ZONE'].unique()
# check that each rural zone code matches with a unique rural zone name
assert all([len(aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()) == 1 for code in rural_codes])
# dictionary mapping code to names
rural_code2name = {code: aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()[0] for code in rural_codes}
aup_zones[aup_zones.ZONE_resol.isna()]
| WORKFLOWJO | last_edi00 | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | ID | NAME | OBJECTID | PARCEL_BAS | PARCEL_B00 | PRECINCT | PRECINCT_r | PRECINCTGR | PRECINCT00 | SCHEDULE | SHAPE_Area | SHAPE_Leng | STATUS | SUBPRECINC | SUBPRECI00 | SUBTYPE | SUBTYPE_re | TYPE | TYPE_resol | VALIDATION | VALIDATI00 | VERSIONSTA | VERSIONS00 | ZONE | ZONE_resol | ZONEHEIGHT | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 20853 | NaN | 20161111011 | None | 20160718211 | None | {2D83F680-9587-4D57-AF0B-CB7CA27C3D2C} | 6 | Special purpose zone | None | None | 20854 | None | None | None | None | None | None | None | 22.286945 | 72.629617 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 58 | None | NaN | POLYGON Z ((1767684.860 5903714.023 33.721, 17... |
| 121765 | NaN | 20161111010 | None | 20160718211 | None | {FD3E8BB4-1979-42B5-9A77-5D04AE5190AF} | 6 | Special purpose zone | None | None | 121766 | None | None | None | None | None | None | None | 4.114962 | 86.135341 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 58 | None | NaN | POLYGON Z ((1767684.083 5903713.690 33.721, 17... |
# 2 NAs in ZONE_resol are from a zone 58, which only has observations
aup_zones[aup_zones.ZONE == '58']
| WORKFLOWJO | last_edi00 | CONTOUR | created_da | DocumentUR | GlobalID | GROUPZONE | GROUPZONE_ | ID | NAME | OBJECTID | PARCEL_BAS | PARCEL_B00 | PRECINCT | PRECINCT_r | PRECINCTGR | PRECINCT00 | SCHEDULE | SHAPE_Area | SHAPE_Leng | STATUS | SUBPRECINC | SUBPRECI00 | SUBTYPE | SUBTYPE_re | TYPE | TYPE_resol | VALIDATION | VALIDATI00 | VERSIONSTA | VERSIONS00 | ZONE | ZONE_resol | ZONEHEIGHT | geometry | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 20853 | NaN | 20161111011 | None | 20160718211 | None | {2D83F680-9587-4D57-AF0B-CB7CA27C3D2C} | 6 | Special purpose zone | None | None | 20854 | None | None | None | None | None | None | None | 22.286945 | 72.629617 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 58 | None | NaN | POLYGON Z ((1767684.860 5903714.023 33.721, 17... |
| 121765 | NaN | 20161111010 | None | 20160718211 | None | {FD3E8BB4-1979-42B5-9A77-5D04AE5190AF} | 6 | Special purpose zone | None | None | 121766 | None | None | None | None | None | None | None | 4.114962 | 86.135341 | None | None | None | None | None | None | None | 3 | Valid and Public | 4 | Operative | 58 | None | NaN | POLYGON Z ((1767684.083 5903713.690 33.721, 17... |
rural = aup_zones[aup_zones.ZONE.isin(rural_codes)]
%%time
# method 1, slightly slower: dissolve by zone
rural_by_zone = rural.dissolve(by='ZONE').reset_index(drop=False)
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for index, rural_row in rural_by_zone.iterrows():
distance_candidates.append(row.geometry.distance(rural_row.geometry))
code_candidates.append(rural_row.ZONE)
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 1min 2s, sys: 363 ms, total: 1min 3s Wall time: 1min 2s
%%time
# method 2, slightly faster: dissolve all after subsetting gpd dfs
rural_by_zone_dict = {code: rural[rural.ZONE == code].dissolve() for code in rural_codes}
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for code, rural_gdf in rural_by_zone_dict.items():
distance_candidates.append(row.geometry.distance(rural_gdf.geometry[0]))
code_candidates.append(rural_gdf.ZONE[0])
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 56.2 s, sys: 289 ms, total: 56.5 s Wall time: 56.2 s
parcels_sample['Hdist_rural'] = distances[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_rural_code'] = codes[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_rural_name'] = parcels_sample.apply(lambda x: rural_code2name[x.Hdist_rural_code], axis=1)
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
colours = ('blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'cyan')
name2colour = {name: colour for name, colour in zip(rural_code2name.values(), colours)}
column = 'Hdist_rural'
subsample = parcels_sample[parcels_sample[column] > 0].sample(5)
subsample['buffer'] = subsample.apply(lambda x: x.geometry.buffer(x[column]), axis=1)
subsample['geometry'] = subsample['buffer']
subsample = subsample[~subsample.is_empty]
# ax = rural.plot(column='ZONE_resol', legend=True)
# subsample.plot(column='Hdist_rural_name', alpha=0.4, ax=ax)
ax = rural.plot(color=[name2colour[z] for z in rural.ZONE_resol])
subsample.plot(color=[name2colour[z] for z in subsample.Hdist_rural_name], alpha=0.65, ax=ax)
plt.ylim((5.88e6, 5.96e6))
# add legend
legend_patches = [mpatches.Patch(color=col, label=name) for name, col in name2colour.items()]
plt.legend(handles=legend_patches)
ctx.add_basemap(ax=ax, crs=2193)
import matplotlib
matplotlib.colors
<module 'matplotlib.colors' from '/data/miniconda3/envs/house-upzone/lib/python3.8/site-packages/matplotlib/colors.py'>
business_codes = aup_zones[aup_zones.ZONE_resol.str.lower().str.contains('business - ', na=False)]['ZONE'].unique()
# check that each business zone code matches with a unique business zone name
assert all([len(aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()) == 1 for code in business_codes])
# dictionary mapping code to names
business_code2name = {code: aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()[0] for code in business_codes}
business_code2name
{'44': 'Business - Neighbourhood Centre Zone',
'12': 'Business - Mixed Use Zone',
'17': 'Business - Light Industry Zone',
'5': 'Business - Heavy Industry Zone',
'49': 'Business - General Business Zone',
'1': 'Business - Business Park Zone',
'22': 'Business - Town Centre Zone',
'10': 'Business - Metropolitan Centre Zone',
'7': 'Business - Local Centre Zone',
'35': 'Business - City Centre Zone'}
business = aup_zones[aup_zones.ZONE.isin(business_codes)]
%%time
# method 1, slightly slower: dissolve by zone
business_by_zone = business.dissolve(by='ZONE').reset_index(drop=False)
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for index, business_row in business_by_zone.iterrows():
distance_candidates.append(row.geometry.distance(business_row.geometry))
code_candidates.append(business_row.ZONE)
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 1min 2s, sys: 363 ms, total: 1min 3s Wall time: 1min 2s
%%time
# method 2, slightly faster: dissolve all after subsetting gpd dfs
business_by_zone_dict = {code: business[business.ZONE == code].dissolve() for code in business_codes}
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for code, business_gdf in business_by_zone_dict.items():
distance_candidates.append(row.geometry.distance(business_gdf.geometry[0]))
code_candidates.append(business_gdf.ZONE[0])
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 44.9 s, sys: 230 ms, total: 45.1 s Wall time: 44.8 s
parcels_sample['Hdist_bus'] = distances[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_bus_code'] = codes[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_bus_name'] = parcels_sample.apply(lambda x: business_code2name[x.Hdist_bus_code], axis=1)
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
colours = ('blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'cyan', 'black', 'white')
name2colour = {name: colour for name, colour in zip(business_code2name.values(), colours)}
# hard to see, subset to smaller area
bounds = {'x1': 1.76e6, 'x2': 1.7613e6, 'y1': 5.9123e6, 'y2': 5.914e6}
column = 'Hdist_bus'
subsample = parcels_sample.cx[bounds['x1']:bounds['x2'], bounds['y1']:bounds['y2']]
subsample['buffer'] = subsample.apply(lambda x: x.geometry.buffer(x[column]), axis=1)
subsample['geometry'] = subsample['buffer']
subsample = subsample[~subsample.is_empty]
business_plot = business.cx[bounds['x1']:bounds['x2'], bounds['y1']:bounds['y2']]
ax = business_plot.plot(color=[name2colour[z] for z in business_plot.ZONE_resol])
subsample.plot(color=[name2colour[z] for z in subsample.Hdist_bus_name], alpha=0.65, ax=ax)
# plt.ylim((5.88e6, 5.96e6))
# add legend
legend_patches = [mpatches.Patch(color=col, label=name) for name, col in name2colour.items()]
plt.legend(handles=legend_patches)
ctx.add_basemap(ax=ax, crs=2193)
/data/miniconda3/envs/house-upzone/lib/python3.8/site-packages/geopandas/geodataframe.py:1322: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super(GeoDataFrame, self).__setitem__(key, value)
resid_codes = aup_zones[aup_zones.ZONE_resol.str.lower().str.contains('resid', na=False)]['ZONE'].unique()
resid_codes
array(['60', '23', '18', '8', '19', '20'], dtype=object)
# check that each resid zone code matches with a unique resid zone name
assert all([len(aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()) == 1 for code in resid_codes])
# dictionary mapping code to names
resid_code2name = {code: aup_zones[aup_zones.ZONE == code].ZONE_resol.unique()[0] for code in resid_codes}
resid_code2name
{'60': 'Residential - Mixed Housing Urban Zone',
'23': 'Residential - Large Lot Zone',
'18': 'Residential - Mixed Housing Suburban Zone',
'8': 'Residential - Terrace Housing and Apartment Building Zone',
'19': 'Residential - Single House Zone',
'20': 'Residential - Rural and Coastal Settlement Zone'}
resid = aup_zones[aup_zones.ZONE.isin(resid_codes)]
%%time
# method 1, slightly slower: dissolve by zone
resid_by_zone = resid.dissolve(by='ZONE').reset_index(drop=False)
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for index, resid_row in resid_by_zone.iterrows():
distance_candidates.append(row.geometry.distance(resid_row.geometry))
code_candidates.append(resid_row.ZONE)
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 1min 2s, sys: 363 ms, total: 1min 3s Wall time: 1min 2s
%%time
# method 2, slightly faster: dissolve all after subsetting gpd dfs
resid_by_zone_dict = {code: resid[resid.ZONE == code].dissolve() for code in resid_codes}
distances = []
codes = []
for index, row in tqdm(parcels_sample.iterrows(), total=len(parcels_sample)):
distance_candidates = []
code_candidates = []
for code, resid_gdf in resid_by_zone_dict.items():
distance_candidates.append(row.geometry.distance(resid_gdf.geometry[0]))
code_candidates.append(resid_gdf.ZONE[0])
distances.append(distance_candidates)
codes.append(code_candidates)
# all distances (to any zone)
distances = np.array(distances)
codes = np.array(codes)
# indices of minimum distances
min_idx = np.argmin(distances, axis=-1)
CPU times: user 2min 9s, sys: 509 ms, total: 2min 9s Wall time: 2min 8s
parcels_sample['Hdist_resid'] = distances[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_resid_code'] = codes[(np.arange(len(distances)), min_idx)]
parcels_sample['Hdist_resid_name'] = parcels_sample.apply(lambda x: resid_code2name[x.Hdist_resid_code], axis=1)
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
colours = ('blue', 'orange', 'green', 'red', 'purple', 'brown', 'pink', 'cyan', 'black', 'white')
name2colour = {name: colour for name, colour in zip(resid_code2name.values(), colours)}
# hard to see, subset to smaller area
bounds = {'x1': 1.76e6, 'x2': 1.7613e6, 'y1': 5.9123e6, 'y2': 5.914e6}
column = 'Hdist_resid'
subsample = parcels_sample.cx[bounds['x1']:bounds['x2'], bounds['y1']:bounds['y2']]
subsample['buffer'] = subsample.apply(lambda x: x.geometry.buffer(x[column]), axis=1)
subsample['geometry'] = subsample['buffer']
subsample = subsample[~subsample.is_empty]
resid_plot = resid.cx[bounds['x1']:bounds['x2'], bounds['y1']:bounds['y2']]
ax = resid_plot.plot(color=[name2colour[z] for z in resid_plot.ZONE_resol])
subsample.plot(color=[name2colour[z] for z in subsample.Hdist_resid_name], alpha=0.65, ax=ax)
# plt.ylim((5.88e6, 5.96e6))
# add legend
legend_patches = [mpatches.Patch(color=col, label=name) for name, col in name2colour.items()]
plt.legend(handles=legend_patches)
ctx.add_basemap(ax=ax, crs=2193)
/data/miniconda3/envs/house-upzone/lib/python3.8/site-packages/geopandas/geodataframe.py:1322: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super(GeoDataFrame, self).__setitem__(key, value)
Note: this is the real name for i: 'Residential - Terrace Housing and Apartment Building Zone'
%%time
postfix2name = {
'SH': 'Residential - Single House Zone',
'MHS': 'Residential - Mixed Housing Suburban Zone',
'MHU': 'Residential - Mixed Housing Urban Zone',
'THA': 'Residential - Terrace Housing and Apartment Building Zone'
}
for postfix, zone in tqdm(postfix2name.items()):
resid_gdf = resid[resid.ZONE_resol == zone].dissolve()
parcels_sample[f'Hdist_{postfix}'] = parcels_sample.apply(lambda x: x.geometry.distance(resid_gdf.geometry[0]), axis=1)
CPU times: user 1min 42s, sys: 78.1 ms, total: 1min 42s Wall time: 1min 42s
postfix = 'THA'
column = f'Hdist_{postfix}'
subsample = parcels_sample.sample(10)
subsample['buffer'] = subsample.apply(lambda x: x.geometry.buffer(x[column]), axis=1)
subsample['geometry'] = subsample['buffer']
ax = subsample.plot(color='red', alpha=0.4)
resid[resid.ZONE_resol == postfix2name[postfix]].plot(ax=ax)
ctx.add_basemap(ax, crs=2193)
sa2 = gpd.read_file('NZ-SA/statistical-area-2-2020-generalised.gdb').to_crs(2193)
sa2.sample(3)
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample = gpd.sjoin(parcels_sample, sa2[['SA22020_V1_00_NAME', 'SA22020_V1_00', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'SA22020_V1_00_NAME': 'SA22018_name', 'SA22020_V1_00': 'SA22018_code'})
CPU times: user 8.29 s, sys: 3.88 ms, total: 8.29 s Wall time: 8.29 s
au2013 = gpd.read_file('input/area-unit-2013.gdb.zip').to_crs(2193)
au2013.sample(3)
| AU2013_V1_00 | AU2013_V1_00_NAME | AREA_SQ_KM | LAND_AREA_SQ_KM | Shape_Length | geometry | |
|---|---|---|---|---|---|---|
| 1481 | 573903 | Newlands North | 0.804087 | 0.804087 | 5958.640337 | MULTIPOLYGON (((174.82896 -41.21949, 174.83011... |
| 958 | 525202 | Kawakawa-Orere | 105.216001 | 105.216001 | 57437.556145 | MULTIPOLYGON (((175.14259 -36.93214, 175.14265... |
| 608 | 597102 | Inland Water-Lake Ellesmere South | 63.304671 | 0.000000 | 75233.330853 | MULTIPOLYGON (((172.57398 -43.76779, 172.57401... |
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample = gpd.sjoin(parcels_sample, au2013[['AU2013_V1_00_NAME', 'AU2013_V1_00', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'AU2013_V1_00_NAME': 'AU2013_name', 'AU2013_V1_00': 'AU2013_code'})
CPU times: user 6.96 s, sys: 0 ns, total: 6.96 s Wall time: 6.96 s
mb2018 = gpd.read_file('input/meshblock-2018-clipped-generalised.gdb.zip').to_crs(4326)
mb2018.sample(3)
| MB2018_V1_00 | LANDWATER | LANDWATER_NAME | LAND_AREA_SQ_KM | AREA_SQ_KM | SHAPE_Length | geometry | |
|---|---|---|---|---|---|---|---|
| 52865 | 4011926 | 12 | Mainland | 0.173710 | 0.173710 | 2129.422679 | MULTIPOLYGON (((174.91945 -36.94519, 174.92053... |
| 23832 | 1429800 | 12 | Mainland | 0.021918 | 0.021918 | 731.394363 | MULTIPOLYGON (((176.91682 -39.49291, 176.91702... |
| 15554 | 0759520 | 12 | Mainland | 0.020695 | 0.020695 | 928.355735 | MULTIPOLYGON (((174.91840 -37.01411, 174.91871... |
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid']
parcels_sample = gpd.sjoin(parcels_sample, mb2018[['MB2018_V1_00', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'MB2018_V1_00': 'MB2018_code'})
CPU times: user 11.3 s, sys: 8.2 ms, total: 11.3 s Wall time: 11.3 s
mb2013 = gpd.read_file('input/meshblock-2013.gdb.zip').to_crs(4326)
mb2013.sample(3)
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample = gpd.sjoin(parcels_sample, mb2013[['MeshblockNumber', 'geometry']]).drop(columns=['index_right'])
parcels_sample = parcels_sample.rename(columns={'MeshblockNumber': 'MB2013_code'})
CPU times: user 11.4 s, sys: 7.05 ms, total: 11.4 s Wall time: 11.4 s
For these distance calculations, use EPSG 2193 (less distortion).
parcels_sample['geometry'] = parcels_sample['geometry_centroid']
parcels_sample = parcels_sample.to_crs(2193)
parcels_sample.sample(3)
| id | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | geometry_centroid | geometry_polygon | SA22018_name | SA22018_code | MB2013_code | MB2018_code | AU2013_name | AU2013_code | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 102782 | 4896031 | Lot 1 DP 61837 | DP 61837 | DCDB | Primary | None | North Auckland | NA17C/935 | 685.0 | 685.0 | POINT (1773608.374 5914596.096) | POINT (174.94849 -36.89867) | MULTIPOLYGON (((174.94827 -36.89874, 174.94841... | Cockle Bay | 153400 | 0655102 | 0655102 | Cockle Bay | 521502 |
| 119786 | 4931702 | Lot 4 DP 166054 | DP 166054 | DCDB | Primary | None | North Auckland | NA100D/34 | 52130.0 | 50575.0 | POINT (1783027.533 5889659.615) | POINT (175.06020 -37.12153) | MULTIPOLYGON (((175.06295 -37.12105, 175.06293... | Ararimu | 166400 | 0815302 | 0815302 | Hunua | 521132 |
| 362342 | 7415715 | Lot 23 DP 455616 | DP 455616, DP 462513 | Fee Simple Title | Primary | None | North Auckland | 586776 | 103.0 | 103.0 | POINT (1763989.858 5916157.717) | POINT (174.84025 -36.88632) | MULTIPOLYGON (((174.84016 -36.88629, 174.84020... | Stonefields West | 144900 | 0465305 | 4000293 | Stonefields | 517202 |
There are a few different datasets that could be used for this:
- NZ Coastlines (Topo 1:50k) https://data.linz.govt.nz/layer/50258-nz-coastlines-topo-150k/
- NZ Coastline - mean high water https://data.linz.govt.nz/layer/105085-nz-coastline-mean-high-water/
- NZ Coastlines and Islands Polygons (Topo 1:50k) https://data.linz.govt.nz/layer/51153-nz-coastlines-and-islands-polygons-topo-150k/
The first doesn't have islands (e.g. Waiheke).
The second is probably most appropriate.
%%time
bounds2193 = {'x1': 1.5e6, 'x2':2e6, 'y1':5.8e6, 'y2':6.1e6}
coastline = gpd.read_file('input/lds-nz-coastline-mean-high-water-FGDB.zip!nz-coastline-mean-high-water.gdb').to_crs(2193)
coastline = coastline.cx[bounds2193['x1']:bounds2193['x2'], bounds2193['y1']:bounds2193['y2']]
coastline_dissolved = coastline.dissolve()
%%time
parcels_sample['Hdist_coast'] = parcels_sample.apply(lambda x: x.geometry.distance(coastline_dissolved.geometry[0]), axis=1)
CPU times: user 14.3 s, sys: 0 ns, total: 14.3 s Wall time: 14.3 s
# if distance work, then red circles should extend to the nearest coastline, and no further
subsample = parcels_sample.sample(10)
subsample['coast_buffer'] = subsample.apply(lambda x: x.geometry.buffer(x.Hdist_coast), axis=1)
subsample['geometry'] = subsample['coast_buffer']
ax = subsample.plot(color='red', alpha=0.4)
coastline.cx[1.7e6:1.8e6, 5.85e6:5.97e6].plot(ax=ax)
<AxesSubplot:>
roads = gpd.read_file('input/lds-nz-road-centrelines-topo-150k-FGDB.zip!nz-road-centrelines-topo-150k.gdb').to_crs(2193)
roads = roads.cx[bounds2193['x1']:bounds2193['x2'], bounds2193['y1']:bounds2193['y2']]
highways = roads[~roads.hway_num.isna()]
highways_dissolved = highways.dissolve()
ax = highways.plot()
ctx.add_basemap(ax, crs=2193)
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample['Hdist_motorway'] = parcels_sample.apply(lambda x: x.geometry.distance(highways_dissolved.geometry[0]), axis=1)
# if distance work, then red circles should extend to the nearest coastline, and no further
subsample = parcels_sample.sample(10)
subsample['highway_buffer'] = subsample.apply(lambda x: x.geometry.buffer(x.Hdist_motorway), axis=1)
subsample['geometry'] = subsample['highway_buffer']
ax = subsample.plot(color='red', alpha=0.4)
highways.plot(ax=ax)
<AxesSubplot:>
railroads = gpd.read_file('input/lds-nz-railway-centrelines-topo-150k-SHP.zip').to_crs(2193)
railroads = railroads.cx[bounds2193['x1']:bounds2193['x2'], bounds2193['y1']:bounds2193['y2']]
railroads_dissolved = railroads.dissolve()
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample['Hdist_rail'] = parcels_sample.apply(lambda x: x.geometry.distance(railroads_dissolved.geometry[0]), axis=1)
# if distance work, then red circles should extend to the nearest coastline, and no further
subsample = parcels_sample.sample(10)
subsample['rail_buffer'] = subsample.apply(lambda x: x.geometry.buffer(x.Hdist_rail), axis=1)
subsample['geometry'] = subsample['rail_buffer']
ax = subsample.plot(color='red', alpha=0.4)
railroads_dissolved.plot(ax=ax)
ctx.add_basemap(ax, crs=2193)
skytower = [-36.84838748948485, 174.7621736911587]
skytower = gpd.points_from_xy(x=[skytower[1]], y=[skytower[0]])
skytower = gpd.GeoDataFrame([{"name": "Skytower", "value": 1}], geometry=skytower, crs="EPSG:4326").to_crs(epsg=2193)
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample['Hdist_skytower'] = parcels_sample.apply(lambda x: x.geometry.distance(skytower.geometry[0]), axis=1)
# if distance works, then red circles should extend to the nearest sky tower, and no further
subsample = parcels_sample.sample(10)
subsample['skytower_buffer'] = subsample.apply(lambda x: x.geometry.buffer(x.Hdist_skytower), axis=1)
subsample['geometry'] = subsample['skytower_buffer']
ax = subsample.plot(color='red', alpha=0.2)
skytower.plot(ax=ax, color='black')
coastline.cx[1.7e6:1.8e6, 5.85e6:5.97e6].plot(ax=ax)
<AxesSubplot:>
subsample
| id | appellation | affected_surveys | parcel_intent | topology_type | statutory_actions | land_district | titles | survey_area | calc_area | geometry | geometry_centroid | geometry_polygon | SA22018_name | SA22018_code | MB2013_code | MB2018_code | AU2013_name | AU2013_code | Hdist_coast | geometry_polygon_2193 | geometry_centroid_2193 | Hdist_skytower | Hdist_motorway | Hdist_rail | skytower_buffer | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 34646 | 4752644 | Lot 307 DP 60820 | DP 60820 | DCDB | Primary | None | North Auckland | NA16C/473 | 660.0 | 661.0 | POLYGON ((1784912.745 5916147.932, 1784844.166... | POINT (174.91519 -36.88522) | MULTIPOLYGON (((174.91515 -36.88504, 174.91528... | Bucklands Beach South | 149700 | 0680200 | 4005617 | Murvale | 522712 | 878.665121 | MULTIPOLYGON (((1770667.266 5916168.106, 17706... | POINT (1770670.737 5916147.932) | 14242.008197 | 7354.712217 | 5313.983934 | POLYGON ((1784912.744913341 5916147.932390111,... |
| 455036 | 7887241 | Lot 33 DP 514144 | DP 514144 | Fee Simple Title | Primary | None | North Auckland | 797267 | 391.0 | 391.0 | POLYGON ((1758926.629 5926816.353, 1758869.357... | POINT (174.64798 -36.79307) | MULTIPOLYGON (((174.64809 -36.79296, 174.64813... | Whenuapai | 117000 | 0221801 | 0221801 | Whenuapai West | 513410 | 343.250278 | MULTIPOLYGON (((1747043.074 5926828.520, 17470... | POINT (1747032.752 5926816.353) | 11893.877215 | 178.382807 | 7862.306454 | POLYGON ((1758926.629288101 5926816.352729866,... |
| 376905 | 8069623 | Lot 3 DP 532972 | DP 532972 | Fee Simple Title | Primary | None | North Auckland | 879022 | 130.0 | 131.0 | POLYGON ((1791002.655 5906606.755, 1790907.346... | POINT (174.92339 -36.97108) | MULTIPOLYGON (((174.92329 -36.97110, 174.92331... | Ormiston South | 158100 | 0713100 | 4000310 | Ormiston | 523713 | 4223.762201 | MULTIPOLYGON (((1771200.606 5906604.885, 17712... | POINT (1771209.552 5906606.755) | 19793.102462 | 3724.092461 | 4827.701181 | POLYGON ((1791002.654879873 5906606.754567775,... |
| 531733 | 4792640 | Lot 1 DP 65366 | DP 65366 | DCDB | Primary | None | North Auckland | NA22B/241 | 675.0 | 677.0 | POLYGON ((1776353.981 5914063.725, 1776302.469... | POINT (174.85940 -36.90489) | MULTIPOLYGON (((174.85931 -36.90468, 174.85960... | Panmure East | 147100 | 0629500 | 0629500 | Panmure Basin | 520602 | 89.557343 | MULTIPOLYGON (((1765648.605 5914087.794, 17656... | POINT (1765656.200 5914063.725) | 10697.781606 | 2118.584953 | 1173.249365 | POLYGON ((1776353.981298447 5914063.725222711,... |
| 149659 | 4993933 | Lot 2 DP 145436 | DP 145436 | DCDB | Primary | None | North Auckland | NA86B/282 | 346.0 | 347.0 | POLYGON ((1762374.646 5918585.080, 1762360.297... | POINT (174.78820 -36.86523) | MULTIPOLYGON (((174.78830 -36.86520, 174.78848... | Remuera West | 139400 | 0481101 | 0481101 | Remuera West | 516101 | 169.917034 | MULTIPOLYGON (((1759403.857 5918588.240, 17594... | POINT (1759394.693 5918585.080) | 2979.953125 | 1269.478099 | 556.151085 | POLYGON ((1762374.646026213 5918585.080392795,... |
| 279043 | 6587485 | Lot 24 DP 211992 | DP 211992 | Fee Simple Title | Primary | None | North Auckland | NA139D/795 | 472.0 | 472.0 | POLYGON ((1808610.902 5880993.094, 1808413.950... | POINT (174.88981 -37.20248) | MULTIPOLYGON (((174.88970 -37.20235, 174.88997... | Pukekohe West | 165500 | 0820703 | 0820703 | Pukekohe West | 525921 | 9026.995468 | MULTIPOLYGON (((1767700.028 5881007.276, 17677... | POINT (1767709.358 5880993.094) | 40901.544737 | 2163.684729 | 1513.646376 | POLYGON ((1808610.902297848 5880993.094492346,... |
| 456299 | 4728136 | Lot 44 DP 51920 | DP 51920 | DCDB | Primary | None | North Auckland | NA1D/649 | 1176.0 | 1176.0 | POLYGON ((1778572.097 5905626.256, 1778495.615... | POINT (174.82793 -36.98143) | MULTIPOLYGON (((174.82772 -36.98155, 174.82741... | Māngere South East | 152800 | 0729100 | 0729100 | Mangere South | 524200 | 730.810779 | MULTIPOLYGON (((1762670.112 5905613.727, 17626... | POINT (1762688.853 5905626.256) | 15883.244242 | 62.208802 | 1853.397499 | POLYGON ((1778572.097154952 5905626.256242074,... |
| 219163 | 5140857 | Lot 7 DP 205212 | DP 205212 | DCDB | Primary | None | North Auckland | NA133D/16 | 41550.0 | 41119.0 | POLYGON ((1818586.669 5995149.697, 1818220.399... | POINT (174.58485 -36.17796) | MULTIPOLYGON (((174.58346 -36.17695, 174.58606... | Cape Rodney | 110400 | 0135400 | 0135400 | Cape Rodney | 506660 | 3796.489937 | MULTIPOLYGON (((1742399.119 5995263.184, 17426... | POINT (1742522.391 5995149.697) | 76064.277754 | 9072.511140 | 10242.355775 | POLYGON ((1818586.669035351 5995149.696901722,... |
| 129894 | 4953555 | Part Lot 3 DP 10889 | DP 10889 | DCDB | Primary | None | North Auckland | NA316/80 | 121.0 | 99.0 | POLYGON ((1757125.496 5920083.306, 1757100.372... | POINT (174.70394 -36.85297) | MULTIPOLYGON (((174.70375 -36.85298, 174.70375... | Point Chevalier West | 129200 | 0394200 | 0394200 | Point Chevalier East | 515002 | 58.497795 | MULTIPOLYGON (((1751891.038 5920082.478, 17518... | POINT (1751907.996 5920083.306) | 5217.499595 | 1943.467599 | 1145.399750 | POLYGON ((1757125.49554849 5920083.305817503, ... |
| 29904 | 4742525 | Lot 1 DP 59316 | DP 59316 | DCDB | Primary | None | North Auckland | NA14B/149 | 658.0 | 657.0 | POLYGON ((1783029.572 5906495.039, 1782948.954... | POINT (174.86815 -36.97297) | MULTIPOLYGON (((174.86808 -36.97281, 174.86830... | Papatoetoe South | 156200 | 0668801 | 0668801 | Papatoetoe East | 522302 | 1978.819715 | MULTIPOLYGON (((1766281.499 5906513.298, 17663... | POINT (1766287.388 5906495.039) | 16742.184100 | 702.500256 | 1716.278248 | POLYGON ((1783029.572144879 5906495.038838989,... |
Indicator (1 or 0) for consent located in SpHAs SpHA_indicator
Note: here I've used centroids. Maybe should use parcel polygons instead.
spha = gpd.read_file('input/AC_Special_Housing_Area.zip').to_crs(2193)
spha_dissolved = spha.dissolve()
assert(len(spha_dissolved) == 1)
%%time
parcels_sample['geometry'] = parcels_sample['geometry_centroid_2193']
parcels_sample['SpHA_indicator'] = parcels_sample.apply(lambda x: spha_dissolved.geometry.contains(x.geometry), axis=1)
# if distance works, then red circles should extend to the nearest sky tower, and no further
subsample = parcels_sample.sample(500)
ax=subsample.plot(column='SpHA_indicator', alpha=0.6)
plt.ylim((5.89e6, 5.95e6))
plt.xlim((1.73e6, 1.78e6))
spha_dissolved.boundary.plot(ax=ax)
ctx.add_basemap(ax, crs=spha_dissolved.crs)